Back

Contents

Create a standalone 3 node Ceph cluster and connect it to a Rook provisioner

1. Via Openstack, create three new nodes (minimum recommended):

  1. On ceph-node-1, install Ceph admin tool and bootstrap cluster:
rbarnsley@ceph-node-1:~$ sudo apt install -y cephadm
rbarnsley@ceph-node-1:~$ sudo cephadm bootstrap --mon-ip <ceph-node-1-ip>
  1. Install Ceph CLI:
rbarnsley@ceph-node-1:~$ sudo cephadm add-repo --release pacific
rbarnsley@ceph-node-1:~$ sudo cephadm install ceph-common
  1. Install prerequisites on other nodes, e.g. for ceph-node-2 (repeat for ceph-node-3):
rbarnsley@ceph-node-2:~$ sudo apt install docker.io
  1. Add ceph-node-1 Ceph public key (/etc/ceph/ceph.pub) to root account authorized_keys on ceph-node-2 and ceph-node-3.

  2. Add nodes to cluster:

rbarnsley@ceph-node-1:~$ sudo ceph orch host add ceph-node-2 <ceph-node-2-ip>
rbarnsley@ceph-node-1:~$ sudo ceph orch host add ceph-node-3 <ceph-node-3-ip>

rbarnsley@ceph-node-1:~$ sudo ceph orch host ls
HOST ADDR LABELS STATUS
ceph-node-1 ceph-node-1
ceph-node-2 <ceph-node-2-ip>
ceph-node-3 <ceph-node-3-ip>
  1. Set up dashboard:
rbarnsley@ceph-node-1:~$ sudo ceph config set mgr mgr/dashboard/ssl false
rbarnsley@ceph-node-1:~$ sudo ceph mgr module disable dashboard
rbarnsley@ceph-node-1:~$ sudo ceph mgr module enable dashboard
rbarnsley@ceph-node-1:~$ echo password > password
rbarnsley@ceph-node-1:~$ sudo ceph dashboard ac-user-create <user> -i <password> administrator
  1. Add route to nginx reverse proxy (optional, only if using reverse proxy!):

rbarnsley@proxy:~$ vim /etc/nginx/sites-available/default

location /ceph/ {
        proxy_pass http://<ceph-node-1-ip>:8080/;
        proxy_redirect off;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host $host;
        proxy_cache_bypass $http_upgrade;

        access_log /root/logs/ceph-access.log;
        error_log /root/logs/ceph-error.log;
}
  1. Via Openstack, create nine volumes for object storage (rbd 500Gb). Attach three to each of the three nodes.
  1. Add the attached storage to each nodes object storage daemon (OSD):
rbarnsley@ceph-node-1:~$ sudo ceph orch device ls --refresh
Hostname     Path      Type  Serial                Size   Health   Ident  Fault  Available  
ceph-node-1  /dev/vdb  hdd   cb3134f3-8f16-478d-b   536G  Unknown  N/A    N/A    Yes         
ceph-node-1  /dev/vdc  hdd   ae6c996c-5084-4db0-a   536G  Unknown  N/A    N/A    Yes         
ceph-node-1  /dev/vdd  hdd   4d8b0d9c-965e-4d80-8   536G  Unknown  N/A    N/A    Yes         
ceph-node-2  /dev/vdb  hdd   80af6067-1a1a-4d75-8   536G  Unknown  N/A    N/A    Yes         
ceph-node-2  /dev/vdc  hdd   62b54ad8-f5c2-4c74-a   536G  Unknown  N/A    N/A    Yes         
ceph-node-2  /dev/vdd  hdd   45aada28-fa56-4a84-8   536G  Unknown  N/A    N/A    Yes         
ceph-node-3  /dev/vdb  hdd   8dbc2a6e-d76d-4da3-b   536G  Unknown  N/A    N/A    Yes         
ceph-node-3  /dev/vdc  hdd   6a8b9837-ccfc-40db-9   536G  Unknown  N/A    N/A    Yes         
ceph-node-3  /dev/vdd  hdd   f58bd5d2-f7af-41d2-a   536G  Unknown  N/A    N/A    Yes         

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-1:/dev/vdb
Created osd(s) 0 on host 'ceph-node-1'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-1:/dev/vdc
Created osd(s) 1 on host 'ceph-node-1'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-1:/dev/vdd
Created osd(s) 2 on host 'ceph-node-1'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-2:/dev/vdb
Created osd(s) 3 on host 'ceph-node-2'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-2:/dev/vdc
Created osd(s) 4 on host 'ceph-node-2'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-2:/dev/vdd
Created osd(s) 5 on host 'ceph-node-2'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-3:/dev/vdb
Created osd(s) 6 on host 'ceph-node-3'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-3:/dev/vdc
Created osd(s) 7 on host 'ceph-node-3'

rbarnsley@ceph-node-1:~$ sudo ceph orch daemon add osd ceph-node-3:/dev/vdd
Created osd(s) 8 on host 'ceph-node-3'
  1. Check all OSDs are attached:
rbarnsley@ceph-node-1:~$ sudo ceph df
--- RAW STORAGE ---
CLASS  SIZE     AVAIL    USED    RAW USED  %RAW USED
hdd    4.4 TiB  4.4 TiB  84 MiB   9.1 GiB       0.20
TOTAL  4.4 TiB  4.4 TiB  84 MiB   9.1 GiB       0.20
  1. Add monitoring services to other nodes for fault resilience (minimum three required for quorum):
rbarnsley@ceph-node-1:~$ sudo ceph orch apply mon
Scheduled mon update...
  1. Get rid of historic errors (not necessary if your cluster's health reads HEALTH_OK!)
daemon crashed: archive the crash error
device_health_metrics pool in unknown state - had no PGs/OSDs associated with it (probably the result of me unmounting a volume while it was being used on init): delete the pool and rebuild

rbarnsley@ceph-node-1:~$ sudo ceph crash ls
ID                                                                ENTITY  NEW  
2021-11-03T14:07:24.450255Z_a008f6e6-5ae5-4295-b27c-c2397bc27ca2  osd.0    *   
rbarnsley@ceph-node-1:~$ ceph crash archive 2021-11-03T14:07:24.450255Z_a008f6e6-5ae5-4295-b27c-c2397bc27ca2

rbarnsley@ceph-node-1:~$ ceph tell mon.* injectargs --mon_allow_pool_delete true
mon.ceph-node-1: mon_allow_pool_delete = 'true' 
mon.ceph-node-1: {}
mon.ceph-node-2: mon_allow_pool_delete = 'true' 
mon.ceph-node-2: {}
mon.ceph-node-3: mon_allow_pool_delete = 'true' 
mon.ceph-node-3: {}
rbarnsley@ceph-node-1:~$ ceph osd pool delete device_health_metrics device_health_metrics --yes-i-really-really-mean-it
pool 'device_health_metrics' removed
rbarnsley@ceph-node-1:~$ ceph tell mon.* injectargs --mon_allow_pool_delete false
mon.ceph-node-1: mon_allow_pool_delete = 'false' 
mon.ceph-node-1: {}
mon.ceph-node-2: mon_allow_pool_delete = 'false' 
mon.ceph-node-2: {}
mon.ceph-node-3: mon_allow_pool_delete = 'false' 
mon.ceph-node-3: {}
rbarnsley@ceph-node-1:~$ sudo ceph osd pool create device_health_metrics

rbarnsley@ceph-node-1:~$ sudo ceph -s
  cluster:
    id:     d24c75b0-3c9f-11ec-b92a-55ff2bd9955d
    health: HEALTH_OK
  1. Create HTTP REST gateways for Ceph (RGWs):
rbarnsley@ceph-node-1:~$ sudo ceph orch apply rgw test-realm test-zone
  1. Create a CephFS (optional):
rbarnsley@ceph-node-1:~$ rbarnsley@ceph-node-1:~$ sudo ceph osd pool create cephfs_data
pool 'cephfs_data' created
rbarnsley@ceph-node-1:~$ sudo ceph osd pool create cephfs_metadata
pool 'cephfs_metadata' created
rbarnsley@ceph-node-1:~$ sudo ceph osd pool ls | grep cephfs
cephfs_data
cephfs_metadata

rbarnsley@ceph-node-1:~$ sudo ceph fs new test cephfs_metadata cephfs_data
rbarnsley@ceph-node-1:~$ sudo ceph orch apply mds test --placement="3 ceph-node-1 ceph-node-2 ceph-node-3"

Adding Rook operator to a Kubernetes server to provision PVs from an external Ceph cluster

The official write up for this is clumsily worded and missing key parts: https://rook.io/docs/rook/v1.7/ceph-cluster-crd.html#external-cluster 

1. Export credentials for importing the cluster into k8s:

rbarnsley@rucio-dev:~$ export NAMESPACE=rook-ceph
rbarnsley@rucio-dev:~$ export ROOK_EXTERNAL_FSID=<rook-external-fsid>
rbarnsley@rucio-dev:~$ export ROOK_EXTERNAL_CEPH_MON_DATA=a=<ceph-node-1>:6789
rbarnsley@rucio-dev:~$ export ROOK_EXTERNAL_ADMIN_SECRET=<rook-external-admin-secret>
rbarnsley@rucio-dev:~$ export CSI_RBD_NODE_SECRET=<csi-rbd-node-secret>
rbarnsley@rucio-dev:~$ export CSI_RBD_PROVISIONER_SECRET =<csi-rbd-provisioner-secret>
  1. Set up prerequisites for cluster:
rbarnsley@rucio-dev:~$ git clone https://github.com/rook/rook.git
rbarnsley@rucio-dev:~$ kubectl create namespace rook-ceph
rbarnsley@rucio-dev:~$ bash rook/cluster/examples/kubernetes/ceph/import-external-cluster.sh
rbarnsley@rucio-dev:~$ kubectl create -f rook/cluster/examples/kubernetes/ceph/crds.yaml -f rook/cluster/examples/kubernetes/ceph/operator.yaml -f rook/cluster/examples/kubernetes/ceph/common.yaml
rbarnsley@rucio-dev:~$ vim rook/cluster/examples/kubernetes/ceph/common-external.yaml (rename occurences of rook-ceph-external namespace to rook-ceph as described in file boilerplate)
rbarnsley@rucio-dev:~$ kubectl apply -f rook/cluster/examples/kubernetes/ceph/common-external.yaml
  1. Deploy a CephCluster:
rbarnsley@rucio-dev:~$ vim rook/cluster/examples/kubernetes/ceph/cluster-external.yaml
apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
  name: rook-ceph-external
  namespace: rook-ceph
spec:
  external:
    enable: true
  crashCollector:
    disable: true
  healthCheck:
    daemonHealth:
      mon:
        disabled: false
        interval: 45s
rbarnsley@rucio-dev:~$ kubectl create -f rook/cluster/examples/kubernetes/ceph/cluster-external.yaml
rbarnsley@rucio-dev:~$ kubectl edit secrets rook-ceph-mon -o yaml

4. CRITICAL: edit the rook-ceph-mon secret after deployment and remove empty fields otherwise the cluster gets stuck in "Connecting" (error observed in operator logs, see below):

rbarnsley@rucio-dev:~$ kubectl edit secrets rook-ceph-mon -o yaml
  1. Ensure cluster is in "Connected" state:
rbarnsley@k8s-dev-head:~$ kubectl get CephCluster -n rook-ceph
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL
rook-ceph-external 87s Connected Cluster connected successfully HEALTH_OK true

Creating a storage class / PVC

1. Create the pool on ceph-node-1 and bind application type to rbd:

rbarnsley@ceph-node-1:~$ ceph osd pool create replicated_500G
pool 'replicated_500G' created

rbarnsley@ceph-node-1:~$ ceph osd pool set-quota replicated_500G max_bytes 500G
set-quota max_bytes = 536870912000 for pool replicated_500G

rbarnsley@ceph-node-1:~$ ceph osd pool application enable replicated_500G rbd
  1. Create new storage class (sc.yml) on k8s cluster and apply:
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: rook-ceph-block-ext
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
   clusterID: rook-ceph
   pool: replicated_500G

   imageFormat: "2"

   imageFeatures: layering

   csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
   csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
   csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
   csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
   csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
   csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph

   csi.storage.k8s.io/fstype: ext4

reclaimPolicy: Delete
allowVolumeExpansion: true

Note: this creates an rbd image in the pool as part of the provisioning so we don't need to do this manually.

rbarnsley@k8s-dev-head:~$ kubectl create -f sc.yml
  1. Create a PVC (pvc.yml) that uses this storage class and apply:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
 name: ceph-ext
 namespace: testing 
spec:
 storageClassName: rook-ceph-block-ext
 accessModes:
 - ReadWriteOnce
 resources:
   requests:
    storage: 500Gi 
rbarnsley@k8s-dev-head:~$ kubectl create -f pvc.yml
rbarnsley@k8s-dev-head:~$ kubectl get pvc -n testing
NAME       STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS          AGE
ceph-ext   Bound    pvc-3a823604-301f-437d-a632-6a0f5ec32d3f   500Gi      RWO            rook-ceph-block-ext   7s

Testing with a dummy pod (optional)

rbarnsley@ceph-node-1:~$ ceph df
--- RAW STORAGE ---
CLASS SIZE AVAIL USED RAW USED %RAW USED
replicated_500G 9 32 9 MiB 548 3.2 GiB 0.08 1.4 TiB
apiVersion: v1
kind: Pod
metadata:
  name: nginx-test
  namespace: testing
spec:
  volumes:
    - name: mystorage
      persistentVolumeClaim:
        claimName: ceph-ext
  containers:
    - name: task-pv-container
      image: quay.io/bitnami/nginx
      ports:
        - containerPort: 80
          name: "http-server"
      volumeMounts:
        - mountPath: "/usr/share/nginx/html"
          name: mystorage
rbarnsley@k8s-dev-head:~$ kubectl create -f dummy.yml
rbarnsley@k8s-dev-head:~$ kubectl exec -it -n testing nginx-test -- /bin/bash
I have no name!@nginx-test:/$ curl http://ipv4.download.thinkbroadband.com/1GB.zip --output /usr/share/nginx/html/1000.zip
rbarnsley@ceph-node-1:~$ ceph df
--- RAW STORAGE ---
CLASS SIZE AVAIL USED RAW USED %RAW USED
replicated_500G 9 32 1.1 GiB 548 3.2 GiB 0.08 1.4 TiB

Top